#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :gushiwen.py
# @Time      :2023/11/28 
# @Author    :CL
# @email     :1037654919@qq.com
# copy from  https://blog.51cto.com/u_15467780/4853209#2.3%20%E6%80%BB%E8%A7%82%E4%BB%A3%E7%A0%81
import requests
from bs4 import BeautifulSoup
from urllib import request
import re

'''
爬取推荐部分
'''
def get_defalut():
    for page_num in range(1,2):
        # 确定目标网页URL
        page_url = 'https://www.gushiwen.org/default_{}.aspx'.format(page_num)
        # 查看目标网页HTML代码
        html_code = request.urlopen(page_url).read().decode('utf-8')
        # 正则表达式提取古诗词文字（输出为列表）
        poetry_info = re.findall(r'<textarea style=" background-color:#F0EFE2; border:0px;overflow:hidden;" cols="1" rows="1" id="txtare(\d+)">(.*?)https://', html_code)
        # 获取列表中的每一目标项
        for poetry_comtent in range(len(poetry_info)):
            print(poetry_info[poetry_comtent][1])
            # with open('all_poetry.txt', 'a+') as f:
            #     f.write(poetry_info[poetry_comtent][1])

'''
爬取诗文部分
'''
def get_shiwen():

    for page_num in range(1,2):
        # 确定目标网页URL
        page_url = 'https://so.gushiwen.cn/shiwen/default_0AA{}.aspx'.format(page_num)
        # 查看目标网页HTML代码
        html_code = request.urlopen(page_url).read().decode('utf-8')
        # 正则表达式提取古诗词文字（输出为列表）
        poetry_info = re.findall(r'<textarea style=" background-color:#F0EFE2; border:0px;overflow:hidden;" cols="1" rows="1" id="txtare(\d+)">(.*?)https://', html_code)
        # 获取列表中的每一目标项
        for poetry_comtent in range(len(poetry_info)):
            print(poetry_info[poetry_comtent][1])
            # 文件保存
            with open('all_poetry.txt', 'a+') as f:
                try:
                    f.write(poetry_info[poetry_comtent][1])
                except Exception as e:
                    print(e)
class Gushiwen():
    def __init__(self):
        self.base_url = 'https://www.gushiwen.cn/'
        self.headers = {
            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
            "Accept-Encoding": "gzip, deflate, br",
            "Referer": "https://so.gushiwen.cn/shiwens/",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-User": "?1",
            "Pragma": "no-cache",
            "Cache-Control": "no-cache"
        }
        self.cookies = {
            "liebiaozhaiyao": "0",
            "Hm_lvt_9007fab6814e892d3020a64454da5a55": "1700447779,1701082851",
            "__bid_n": "187ffc4168bdacd93b4207",
            "FPTOKEN": "R5VH8ZzM7r/JXqKBcz77fq8Sq+81ja6UA8r8HPX3nRejqFRATOYrXSPOBxXt4+A+BaQGd4fpnGHthzl9KnBL+hO55EmsT0BSQlrDWVuGpG6ZnjwjWzLxJvSEr5gmwojdH03KcUvbL+LFWwW6jN13aWqeby9wqdaudKu8MJWouAYZWhwjLsdvAUK2IOtzglz/j4kwBKbiROEMgktccpbKHdCYVFJbw25VYxLtlE9aSpG3RDNR0Q3YacC97bFjcfV32Ix6g6bTzSHn7GASFXrxnrIvCYWcrpgoOXiuqK/YUei5IBaV3D2M0QMM4K/+16shsyWc8QxAP/zx2hElesncyfyWc3n56qAj3P1UYV+Wtgx31d3aQSvQeNXkIBAlJNSFWdh8q1E/UOZFHZQpg8IgjA==|ntdC1yzrNllG4LteLpxd8rWDNd4khMO6V/P5MLOhBDA=|10|5c0eed0d18af7aac6eaa58cab159bc7b",
            "Hm_lpvt_9007fab6814e892d3020a64454da5a55": "1701137996",
            "login": "flase",
            "ticketStr": "205449381%7cgQEy8DwAAAAAAAAAAS5odHRwOi8vd2VpeGluLnFxLmNvbS9xLzAyN0d4SlI5bGVkN2kxeWs0Y3hCMTgAAgSUd2RlAwQAjScA",
            "ASP.NET_SessionId": "uggxtb3j0ng5ooehmwr3weyd",
            "gsw2017user": "4195998%7cC2445FE659F13C4CD84005FEFB503CAF%7c2000%2f1%2f1%7c2000%2f1%2f1",
            "wxopenid": "defoaltid",
            "gswZhanghao": "15801366532",
            "gswPhone": "15801366532",
            "idsShiwen2017": "%2c47894%2c71618%2c71250%2c71206%2c",
            "acw_tc": "7ae1d11f17011361358531536e04c7bc6fc77ca7f958e6d46c4bd7bb23",
            "cdn_sec_tc": "7ae1d11f17011361358531536e04c7bc6fc77ca7f958e6d46c4bd7bb23"
        }
        pass
    def get_author_list(self,url = 'https://so.gushiwen.cn/authors/'):

        response = requests.get(url, headers=self.headers, cookies=self.cookies)
        text = response.text
        text = '''
        <div class="typecont" style="border:0px;">
<span><a href="/authorv_b90660e3e492.aspx" target="_blank">李白</a>(唐代)</span>
<span><a href="/authorv_3b99a16ff2dd.aspx" target="_blank">苏轼</a>(宋代)</span>
<span><a href="/authorv_85097dd0c645.aspx" target="_blank">白居易</a>(唐代)</span>
<span><a href="/authorv_515ea88d1858.aspx" target="_blank">杜甫</a>(唐代)</span>
<span><a href="/authorv_9cb3b7c0e4a0.aspx" target="_blank">李清照</a>(宋代)</span>
<span><a href="/authorv_a7900666497f.aspx" target="_blank">辛弃疾</a>(宋代)</span>
<span><a href="/authorv_52fceee85532.aspx" target="_blank">王维</a>(唐代)</span>
<span><a href="/authorv_01611cc80faf.aspx" target="_blank">纳兰性德</a>(清代)</span>
<span><a href="/authorv_bc94c92721b8.aspx" target="_blank">李商隐</a>(唐代)</span>
<span><a href="/authorv_efd5da0ed1a1.aspx" target="_blank">陆游</a>(宋代)</span>
<span><a href="/authorv_07d17f8539d7.aspx" target="_blank">陶渊明</a>(魏晋)</span>
<span><a href="/authorv_e3c4e8cf2646.aspx" target="_blank">刘禹锡</a>(唐代)</span>
<span><a href="/authorv_05635286bf64.aspx" target="_blank">李煜</a>(五代)</span>
<span><a href="/authorv_727e9dff8850.aspx" target="_blank">杜牧</a>(唐代)</span>
<span><a href="/authorv_1abe13750637.aspx" target="_blank">韩愈</a>(唐代)</span>
<span><a href="/authorv_7ab3b8200774.aspx" target="_blank">欧阳修</a>(宋代)</span>
<span><a href="/authorv_6485481407d1.aspx" target="_blank">王安石</a>(宋代)</span>
<span><a href="/authorv_0ba13a22799e.aspx" target="_blank">柳宗元</a>(唐代)</span>
<span><a href="/authorv_9c69482f885f.aspx" target="_blank">屈原</a>(先秦)</span>
<span><a href="/authorv_682bdf0fd34e.aspx" target="_blank">柳永</a>(宋代)</span>
<span><a href="/authorv_3811e4e1f460.aspx" target="_blank">孟浩然</a>(唐代)</span>
<span><a href="/authorv_f4d9b1ed94dc.aspx" target="_blank">曹操</a>(两汉)</span>
<span><a href="/authorv_74d46d599f15.aspx" target="_blank">李贺</a>(唐代)</span>
<span><a href="/authorv_201a0677dee4.aspx" target="_blank">元稹</a>(唐代)</span>
<span><a href="/authorv_1585930d4e76.aspx" target="_blank">左丘明</a>(先秦)</span>
<span><a href="/authorv_e6b970da08cd.aspx" target="_blank">王勃</a>(唐代)</span>
<span><a href="/authorv_e23f1dc70eb3.aspx" target="_blank">老子</a>(先秦)</span>
<span><a href="/authorv_c2c3edb9c1b1.aspx" target="_blank">范仲淹</a>(宋代)</span>
<span><a href="/authorv_d8cd163d1522.aspx" target="_blank">孟子</a>(先秦)</span>
<span><a href="/authorv_c367ccd8cd08.aspx" target="_blank">温庭筠</a>(唐代)</span>
<span><a href="/authorv_d9343fa5dac7.aspx" target="_blank">王昌龄</a>(唐代)</span>
<span><a href="/authorv_9ed9d0f42193.aspx" target="_blank">谭嗣同</a>(清代)</span>
<span><a href="/authorv_4ed90e0ff57c.aspx" target="_blank">尹志平</a>(元代)</span>
<span><a href="/authorv_b6876072990d.aspx" target="_blank">徐霞客</a>(明代)</span>
<span><a href="/authorv_c42437d1d24c.aspx" target="_blank">朱棣</a>(明代)</span>
<span><a href="/authorv_6c695909f577.aspx" target="_blank">曹植</a>(两汉)</span>
<span><a href="/authorv_0969d1da1ac1.aspx" target="_blank">岑参</a>(唐代)</span>
<span><a href="/authorv_f01178aaa8c0.aspx" target="_blank">司马迁</a>(两汉)</span>
<span><a href="/authorv_d4825d86f017.aspx" target="_blank">秦观</a>(宋代)</span>
<span><a href="/authorv_20e68621f077.aspx" target="_blank">晏殊</a>(宋代)</span>
<span><a href="/authorv_e82a672a1ca9.aspx" target="_blank">诸葛亮</a>(两汉)</span>
<span><a href="/authorv_677ad0bb97e7.aspx" target="_blank">杨万里</a>(宋代)</span>
<span><a href="/authorv_00ea9cc9fdbf.aspx" target="_blank">韦应物</a>(唐代)</span>
<span><a href="/authorv_bdbd5be2bd0b.aspx" target="_blank">唐寅</a>(明代)</span>
<span><a href="/authorv_568070f3fde8.aspx" target="_blank">贾岛</a>(唐代)</span>
<span><a href="/authorv_1e6d9d77697a.aspx" target="_blank">晏几道</a>(宋代)</span>
<span><a href="/authorv_ae228ff17e71.aspx" target="_blank">朱熹</a>(宋代)</span>
<span><a href="/authorv_b3e23d54ee99.aspx" target="_blank">刘长卿</a>(唐代)</span>
<span><a href="/authorv_bc5bd15a8c34.aspx" target="_blank">岳飞</a>(宋代)</span>
<span><a href="/authorv_76758677c136.aspx" target="_blank">方干</a>(唐代)</span>
<span><a href="/authorv_252e1b6ee61f.aspx" target="_blank">黄庭坚</a>(宋代)</span>
<span><a href="/authorv_2f7f3273612a.aspx" target="_blank">孟郊</a>(唐代)</span>
<span><a href="/authorv_7d4ee0783235.aspx" target="_blank">曹雪芹</a>(清代)</span>
<span><a href="/authorv_7ce628a673ef.aspx" target="_blank">韦庄</a>(唐代)</span>
<span><a href="/authorv_6e442cfc26e0.aspx" target="_blank">庄周</a>(先秦)</span>
<span><a href="/authorv_b44661291893.aspx" target="_blank">高适</a>(唐代)</span>
<span><a href="/authorv_40ce21be572d.aspx" target="_blank">周邦彦</a>(宋代)</span>
<span><a href="/authorv_7ef6502f1e96.aspx" target="_blank">文天祥</a>(宋代)</span>
<span><a href="/authorv_1a92cf411c9c.aspx" target="_blank">荀子</a>(先秦)</span>
<span><a href="/authorv_3de7172b3782.aspx" target="_blank">罗隐</a>(唐代)</span>
<span><a href="/authorv_0f2fe2d36f61.aspx" target="_blank">张九龄</a>(唐代)</span>
<span><a href="/authorv_d0f71489043c.aspx" target="_blank">马致远</a>(元代)</span>
<span><a href="/authorv_be16b2b23d0a.aspx" target="_blank">陈子昂</a>(唐代)</span>
<span><a href="/authorv_2128926194cd.aspx" target="_blank">佚名</a>(未知)</span>
<span><a href="/authorv_ffbc39f27901.aspx" target="_blank">卓文君</a>(两汉)</span>
<span><a href="/authorv_05460f6423b0.aspx" target="_blank">张籍</a>(唐代)</span>
<span><a href="/authorv_f8b6b7d9fab9.aspx" target="_blank">姜夔</a>(宋代)</span>
<span><a href="/authorv_a84be3cddeda.aspx" target="_blank">司马相如</a>(两汉)</span>
<span><a href="/authorv_787d4a1969b8.aspx" target="_blank">张若虚</a>(唐代)</span>
<span><a href="/authorv_236f882d5f10.aspx" target="_blank">齐己</a>(唐代)</span>
<span><a href="/authorv_15d08afc7439.aspx" target="_blank">贾谊</a>(两汉)</span>
<span><a href="/authorv_c1faf0e6cdcb.aspx" target="_blank">范成大</a>(宋代)</span>
<span><a href="/authorv_ff17134b62ff.aspx" target="_blank">许浑</a>(唐代)</span>
<span><a href="/authorv_ae8538b4e917.aspx" target="_blank">王建</a>(唐代)</span>
<span><a href="/authorv_f59c39d8cecd.aspx" target="_blank">李耳</a>(先秦)</span>
<span><a href="/authorv_e0c140ccdde2.aspx" target="_blank">龚自珍</a>(清代)</span>
<span><a href="/authorv_65350a0ae0a0.aspx" target="_blank">鱼玄机</a>(唐代)</span>
<span><a href="/authorv_637fa1f1b67a.aspx" target="_blank">王之涣</a>(唐代)</span>
<span><a href="/authorv_389bc382deaf.aspx" target="_blank">贺铸</a>(宋代)</span>
<span><a href="/authorv_b8f9680ff0fe.aspx" target="_blank">骆宾王</a>(唐代)</span>
<span><a href="/authorv_cce8f82d981b.aspx" target="_blank">张岱</a>(明代)</span>
<span><a href="/authorv_a4c70d8d6e0e.aspx" target="_blank">郑燮</a>(清代)</span>
<span><a href="/authorv_79e0e9d1f260.aspx" target="_blank">贺知章</a>(唐代)</span>
<span><a href="/authorv_f57e6789865a.aspx" target="_blank">吴文英</a>(宋代)</span>
<span><a href="/authorv_179f3ae96ec4.aspx" target="_blank">皎然</a>(唐代)</span>
<span><a href="/authorv_932e5d797574.aspx" target="_blank">韩非</a>(先秦)</span>
<span><a href="/authorv_2d4282baeca8.aspx" target="_blank">张祜</a>(唐代)</span>
<span><a href="/authorv_87bdc6fea2cb.aspx" target="_blank">元好问</a>(金朝)</span>
<span><a href="/authorv_454d6e3e96ce.aspx" target="_blank">王羲之</a>(魏晋)</span>
<span><a href="/authorv_46b894036ae3.aspx" target="_blank">陆龟蒙</a>(唐代)</span>
<span><a href="/authorv_a6acb3e3e0dd.aspx" target="_blank">贯休</a>(唐代)</span>
<span><a href="/authorv_8a87a861d840.aspx" target="_blank">谢灵运</a>(南北朝)</span>
<span><a href="/authorv_3fade50333c4.aspx" target="_blank">杜荀鹤</a>(唐代)</span>
<span><a href="/authorv_f3054465947d.aspx" target="_blank">于谦</a>(明代)</span>
<span><a href="/authorv_782b7ec7e8ae.aspx" target="_blank">卢纶</a>(唐代)</span>
<span><a href="/authorv_d82bfa645313.aspx" target="_blank">朱敦儒</a>(宋代)</span>
<span><a href="/authorv_b9737a950f1f.aspx" target="_blank">周敦颐</a>(宋代)</span>
<span><a href="/authorv_c364b4db5ff7.aspx" target="_blank">宋玉</a>(先秦)</span>
<span><a href="/authorv_74546b0b8005.aspx" target="_blank">袁枚</a>(清代)</span>
<span><a href="/authorv_0478318c4998.aspx" target="_blank">仓央嘉措</a>(清代)</span>
<span><a href="/authorv_4b13c1698a34.aspx" target="_blank">郦道元</a>(南北朝)</span>
<span><a href="/authorv_273c4b7b5807.aspx" target="_blank">白朴</a>(元代)</span>
<span><a href="/authorv_3e4eac5829e0.aspx" target="_blank">冯延巳</a>(五代)</span>
<span><a href="/authorv_5456aa6d3e19.aspx" target="_blank">蒋捷</a>(宋代)</span>
<span><a href="/authorv_8a732b57b0ac.aspx" target="_blank">曹丕</a>(魏晋)</span>
<span><a href="/authorv_58f3c71f76d8.aspx" target="_blank">杨慎</a>(明代)</span>
<span><a href="/authorv_c604082cdd76.aspx" target="_blank">曾国藩</a>(清代)</span>
<span><a href="/authorv_ce61301b9cf7.aspx" target="_blank">韩偓</a>(唐代)</span>
<span><a href="/authorv_9ff65fca16cc.aspx" target="_blank">崔颢</a>(唐代)</span>
<span><a href="/authorv_74553f397a03.aspx" target="_blank">苏洵</a>(宋代)</span>
<span><a href="/authorv_f5e3721e014f.aspx" target="_blank">李峤</a>(唐代)</span>
<span><a href="/authorv_dcf7e0284e0d.aspx" target="_blank">张先</a>(宋代)</span>
<span><a href="/authorv_626c0d0c46c9.aspx" target="_blank">阮籍</a>(魏晋)</span>
<span><a href="/authorv_5219605011ab.aspx" target="_blank">钱起</a>(唐代)</span>
<span><a href="/authorv_83fa70cf5a10.aspx" target="_blank">张孝祥</a>(宋代)</span>
<span><a href="/authorv_e8d2cec3ffbe.aspx" target="_blank">王守仁</a>(明代)</span>
<span><a href="/authorv_61eacce8c314.aspx" target="_blank">姚合</a>(唐代)</span>
<span><a href="/authorv_03097a189759.aspx" target="_blank">刘义庆</a>(南北朝)</span>
<span><a href="/authorv_ed13dc2f731c.aspx" target="_blank">皮日休</a>(唐代)</span>
<span><a href="/authorv_3b9be9277871.aspx" target="_blank">嵇康</a>(魏晋)</span>
<span><a href="/authorv_6864013c4ea6.aspx" target="_blank">司马光</a>(宋代)</span>
<span><a href="/authorv_c3783dd37722.aspx" target="_blank">李斯</a>(先秦)</span>
<span><a href="/authorv_161948f422c0.aspx" target="_blank">苏辙</a>(宋代)</span>
<span><a href="/authorv_5c479a1b49fb.aspx" target="_blank">关汉卿</a>(元代)</span>
<span><a href="/authorv_1b2c0f1e1dd4.aspx" target="_blank">朱淑真</a>(宋代)</span>
<span><a href="/authorv_787b2f03c695.aspx" target="_blank">鲍照</a>(南北朝)</span>
<span><a href="/authorv_4367d1b4dd8c.aspx" target="_blank">刘向</a>(两汉)</span>
<span><a href="/authorv_d0a092a4b65b.aspx" target="_blank">卢照邻</a>(唐代)</span>
<span><a href="/authorv_44ca268d5c43.aspx" target="_blank">刘克庄</a>(宋代)</span>
<span><a href="/authorv_af3666b92472.aspx" target="_blank">戴叔伦</a>(唐代)</span>
<span><a href="/authorv_a56fa849ea37.aspx" target="_blank">张可久</a>(元代)</span>
<span><a href="/authorv_00133162b1d0.aspx" target="_blank">宋濂</a>(明代)</span>
<span><a href="/authorv_4de4bf6be5b6.aspx" target="_blank">孙武</a>(先秦)</span>
<span><a href="/authorv_ebab180c2bf9.aspx" target="_blank">蒲松龄</a>(清代)</span>
<span><a href="/authorv_da2cd14d14b1.aspx" target="_blank">李益</a>(唐代)</span>
<span><a href="/authorv_ea0c2ce3af6d.aspx" target="_blank">项羽</a>(先秦)</span>
<span><a href="/authorv_0410a34d2e7c.aspx" target="_blank">宋之问</a>(唐代)</span>
<span><a href="/authorv_9a052bcededd.aspx" target="_blank">王冕</a>(元代)</span>
<span><a href="/authorv_e429c2a34cf3.aspx" target="_blank">张养浩</a>(元代)</span>
<span><a href="/authorv_3001715a9955.aspx" target="_blank">赵嘏</a>(唐代)</span>
<span><a href="/authorv_755ffc8b28b0.aspx" target="_blank">李世民</a>(唐代)</span>
<span><a href="/authorv_5ca283e748ca.aspx" target="_blank">郑谷</a>(唐代)</span>
<span><a href="/authorv_c6454212c8da.aspx" target="_blank">张炎</a>(宋代)</span>
<span><a href="/authorv_a4f5884125b1.aspx" target="_blank">司空图</a>(唐代)</span>
<span><a href="/authorv_b8ce6bb93501.aspx" target="_blank">韩翃</a>(唐代)</span>
<span><a href="/authorv_e142a6aeba9d.aspx" target="_blank">归有光</a>(明代)</span>
<span><a href="/authorv_8976db397833.aspx" target="_blank">陈与义</a>(宋代)</span>
<span><a href="/authorv_5452bed2115f.aspx" target="_blank">刘辰翁</a>(宋代)</span>
<span><a href="/authorv_3cf87fbd143c.aspx" target="_blank">林逋</a>(宋代)</span>
<span><a href="/authorv_b2062eef6b39.aspx" target="_blank">庾信</a>(南北朝)</span>
<span><a href="/authorv_90bafbe61cf7.aspx" target="_blank">权德舆</a>(唐代)</span>
<span><a href="/authorv_e9907dd08d8b.aspx" target="_blank">皇甫冉</a>(唐代)</span>
<span><a href="/authorv_61b1f8244d24.aspx" target="_blank">赵长卿</a>(宋代)</span>
<span><a href="/authorv_1a08221468a4.aspx" target="_blank">薛涛</a>(唐代)</span>
<span><a href="/authorv_818db43d0d81.aspx" target="_blank">叶绍翁</a>(宋代)</span>
<span><a href="/authorv_7d5432b45289.aspx" target="_blank">李绅</a>(唐代)</span>
<span><a href="/authorv_de8c4d88daec.aspx" target="_blank">列子</a>(先秦)</span>
<span><a href="/authorv_da41370d6cf5.aspx" target="_blank">吴均</a>(南北朝)</span>
<span><a href="/authorv_b728df127bcb.aspx" target="_blank">谢朓</a>(南北朝)</span>
<span><a href="/authorv_e47835478657.aspx" target="_blank">张说</a>(唐代)</span>
<span><a href="/authorv_79bc07db1e74.aspx" target="_blank">徐再思</a>(元代)</span>
<span><a href="/authorv_54852b07c8f0.aspx" target="_blank">孔丘</a>(先秦)</span>
<span><a href="/authorv_961d3db85f01.aspx" target="_blank">黄景仁</a>(清代)</span>
<span><a href="/authorv_02835e06c206.aspx" target="_blank">吕岩</a>(唐代)</span>
<span><a href="/authorv_49f2eee37817.aspx" target="_blank">杨炯</a>(唐代)</span>
<span><a href="/authorv_2f56f31e2127.aspx" target="_blank">王翰</a>(唐代)</span>
<span><a href="/authorv_870e6ba7bbf4.aspx" target="_blank">班固</a>(两汉)</span>
<span><a href="/authorv_d2c43a8ea55a.aspx" target="_blank">顾况</a>(唐代)</span>
<span><a href="/authorv_7bfda9741f8c.aspx" target="_blank">李之仪</a>(宋代)</span>
<span><a href="/authorv_82dc795fbcac.aspx" target="_blank">李端</a>(唐代)</span>
<span><a href="/authorv_6fbeeeefce3b.aspx" target="_blank">纪昀</a>(清代)</span>
<span><a href="/authorv_4cbc370b2db5.aspx" target="_blank">陶弘景</a>(南北朝)</span>
<span><a href="/authorv_623fb18b7792.aspx" target="_blank">张继</a>(唐代)</span>
<span><a href="/authorv_cab314bc1132.aspx" target="_blank">唐琬</a>(宋代)</span>
<span><a href="/authorv_a1f725c6aa25.aspx" target="_blank">刘基</a>(明代)</span>
<span><a href="/authorv_45c7b7ffa770.aspx" target="_blank">程垓</a>(宋代)</span>
<span><a href="/authorv_a4d0598c1f4c.aspx" target="_blank">吴融</a>(唐代)</span>
<span><a href="/authorv_6d77998d6dac.aspx" target="_blank">王绩</a>(唐代)</span>
<span><a href="/authorv_45bdbf3f2268.aspx" target="_blank">黄巢</a>(唐代)</span>
<span><a href="/authorv_0955ca350ffe.aspx" target="_blank">马戴</a>(唐代)</span>
<span><a href="/authorv_02a7e7e6cfd2.aspx" target="_blank">张元干</a>(宋代)</span>
<span><a href="/authorv_d4952941ee78.aspx" target="_blank">陆机</a>(魏晋)</span>
<span><a href="/authorv_68a453f313d5.aspx" target="_blank">常建</a>(唐代)</span>
<span><a href="/authorv_636cef5b84f0.aspx" target="_blank">张衡</a>(两汉)</span>
<span><a href="/authorv_2b648ebf4318.aspx" target="_blank">司空曙</a>(唐代)</span>
<span><a href="/authorv_06c9f58eb897.aspx" target="_blank">吴潜</a>(宋代)</span>
<span><a href="/authorv_22f0b211ccbc.aspx" target="_blank">李颀</a>(唐代)</span>
<span><a href="/authorv_2466a6bc5628.aspx" target="_blank">刘邦</a>(两汉)</span>
<span><a href="/authorv_6d29abb0d24a.aspx" target="_blank">左思</a>(魏晋)</span>
<span><a href="/authorv_af94a5c4ceea.aspx" target="_blank">王湾</a>(唐代)</span>
<span><a href="/authorv_3722daca376d.aspx" target="_blank">吕蒙正</a>(宋代)</span>
<span><a href="/authorv_894a6c66e6a6.aspx" target="_blank">周密</a>(宋代)</span>
<span><a href="/authorv_3d8659414462.aspx" target="_blank">曾巩</a>(宋代)</span>
<span><a href="/authorv_3ad43254c14a.aspx" target="_blank">孔子</a>(先秦)</span>
<span><a href="/authorv_b9cb27168016.aspx" target="_blank">姜子牙</a>(先秦)</span>
<span><a href="/authorv_04138f12360f.aspx" target="_blank">崔护</a>(唐代)</span>
<span><a href="/authorv_6dafeedcb48e.aspx" target="_blank">戎昱</a>(唐代)</span>
<span><a href="/authorv_665b8c438556.aspx" target="_blank">颜真卿</a>(唐代)</span>
<span><a href="/authorv_e168d344dc03.aspx" target="_blank">李渔</a>(清代)</span>
<span><a href="/authorv_d6bbd9ff7e78.aspx" target="_blank">晁补之</a>(宋代)</span>
</div>'''
        print(response.url,response)
        soup = BeautifulSoup(text,'html.parser')
        # 获取作者名
        authors = soup.find_all('a',target='_blank')
        lists=[]
        for a in authors:
            name = a.get_text().strip()
            href = 'https://so.gushiwen.cn'+a['href']
            lists.append({'name':name,'href':href})
        return lists
    #获取作者信息
    def get_author_info(self,url ='https://so.gushiwen.cn/authorv_3b99a16ff2dd.aspx'):
        response = requests.get(url, headers=self.headers, cookies=self.cookies)

        print(response.url,response)
        response.encoding = 'utf-8'
        print(response.text)
        soup = BeautifulSoup(response.text,'html.parser')
        # 获取作者名
        author_infos = soup.find('div',class_='left')
        result ={}
        sonsyuanwen = author_infos.find('div',class_='sonspic').get_text().strip()
        name = author_infos.find('div',id='sonsyuanwen').find('h1').get_text().strip()
        result['name'] = name
        result['sonsyuanwen'] = sonsyuanwen
        infos =  author_infos.find_all('div',style='position:relative; z-index:0px;')
        for a in infos:
            title = a.find('h2').get_text().strip()
            text = a.get_text().strip()
            result[title]=text
        return result


if __name__ == '__main__':
    print()
    gushiwen = Gushiwen()
    print(gushiwen.get_author_info())